{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>joy</td>\n",
       "      <td>On days when I feel close to my partner and ot...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>fear</td>\n",
       "      <td>Every time I imagine that someone I love or I ...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>anger</td>\n",
       "      <td>When I had been obviously unjustly treated and...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>sadness</td>\n",
       "      <td>When I think about the short time that we live...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>disgust</td>\n",
       "      <td>At a gathering I found myself involuntarily si...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         0                                                  1    2\n",
       "0      joy  On days when I feel close to my partner and ot...  NaN\n",
       "1     fear  Every time I imagine that someone I love or I ...  NaN\n",
       "2    anger  When I had been obviously unjustly treated and...  NaN\n",
       "3  sadness  When I think about the short time that we live...  NaN\n",
       "4  disgust  At a gathering I found myself involuntarily si...  NaN"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd \n",
    "\n",
    "data = pd.read_csv('./data/ISEAR.csv', header=None)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "labels = data[0].values.tolist()\n",
    "sentences = data[1].values.tolist()\n",
    "# 按照4：1的比例随机划分训练集与测试集\n",
    "X_train, X_test, y_train, y_test = train_test_split(sentences, labels, test_size=0.2, random_state=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "vectorizer = TfidfVectorizer()\n",
    "# 这里仅提取tfidf特征\n",
    "X_train = vectorizer.fit_transform(X_train)\n",
    "X_test = vectorizer.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/six/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py:652: Warning: The least populated class in y has only 1 members, which is too few. The minimum number of members in any class cannot be less than n_splits=5.\n",
      "  % (min_groups, self.n_splits)), Warning)\n",
      "/home/six/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'C': 5}\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "# 用逻辑回归进行分类\n",
    "lr = LogisticRegression(multi_class='auto')\n",
    "\n",
    "params = {'C': [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10]}\n",
    "# 使用GridSearch方法调参\n",
    "clf = GridSearchCV(lr, params, cv=5)\n",
    "clf.fit(X_train, y_train)\n",
    "print(clf.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 99  17  10  30   6  18  20]\n",
      " [ 32 123  14  13   7   6  20]\n",
      " [ 15  11 140   5   4  18  10]\n",
      " [ 31  14  13 103   7  10  34]\n",
      " [ 12   8   5   7 172  15  17]\n",
      " [ 24   6   7  15  28 142  12]\n",
      " [ 23  10  10  30   9  11 111]]\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "       anger       0.42      0.49      0.45       200\n",
      "     disgust       0.65      0.57      0.61       215\n",
      "        fear       0.70      0.69      0.70       203\n",
      "       guilt       0.51      0.49      0.50       212\n",
      "         joy       0.74      0.73      0.73       236\n",
      "     sadness       0.65      0.61      0.63       234\n",
      "       shame       0.50      0.54      0.52       204\n",
      "\n",
      "   micro avg       0.59      0.59      0.59      1504\n",
      "   macro avg       0.59      0.59      0.59      1504\n",
      "weighted avg       0.60      0.59      0.59      1504\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix, classification_report\n",
    "\n",
    "y_pred = clf.predict(X_test)\n",
    "# 分类混淆矩阵\n",
    "print(confusion_matrix(y_test, y_pred))\n",
    "# 分类结果报告\n",
    "print(classification_report(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看出，“fear”和“joy”相对更容易识别出，当然这只是个很简单的情感分类器"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
